#!/usr/bin/env python2
#coding=utf-8

import commands
import os
import sys
import time
import json
import errno
import getpass
from paramiko import SSHException
from optparse import OptionParser

from buddha import lsb
from buddha.lichbd import Lichbd, LichAdmin, LichNode

from cluster_meta import ClusterMeta
from utils import Exp, _dmsg, _dwarn, _derror,\
        _exec_shell, _exec_remote, _human_readable, \
        _str2hosts, _star2hosts, _search_hosts, _put_remote, \
        _sshkey, _exec_pipe, _exec_pipe1, mutil_exec, _check_hosts,\
        _isip, is_ping, _exec_shell1, time2str, _human_readable_time
from storage import Storage
from diskmap import name2site
from config import Config
from pool_manage import PoolManage

from metanode_balance_with_diskmap import balance as metanode_balance

from etcd_manage import Etcd_manage

etc_exclude = " --exclude=fusionstor --exclude=fusionnas --exclude=umpweb --exclude=imp --exclude=disk.conf --exclude=cache.conf --exclude=cpuset.worker "
LICH_VERSION_SNAPTREE = 1


class Cluster(ClusterMeta):

    def __get_hosts(self, hosts):
        for i in range(len(hosts)):
            if hosts[i][0] == '-':
                hosts.pop(i)

        newlist = _str2hosts(hosts)
        return _search_hosts(self.config.hosts.keys(), newlist)

    def get_hosts(self, hosts=None):
        if hosts:
            new_hosts = self.__get_hosts(hosts)
        else:
            new_hosts = self.config.hosts.keys()
        return new_hosts

    def _node_list(self, hosts=None):
        if hosts:
            hosts = self.__get_hosts(hosts)
            _check_hosts(hosts, self.config.nohosts)
        else:
            hosts = self.config.hosts.keys()
        return hosts

    def _node_exec(self, host, cmd):
        try:
            (out, err) = _exec_remote(host, cmd)
            if (out):
                _dmsg(host + ":\n" + out)
            if (err):
                _dwarn(host + ":\n" + err)
            #print("%s @ %s" %(cmd, host))
            #print(res)
        except IOError as err:
            _derror("%s:%s %s" % (host, cmd, str(err)))
        except Exp as err:
            _derror("%s:%s %s" % (host, cmd, str(err)))
        except Exception as err:
            _derror("%s:%s %s" % (host, cmd, str(err)))

    def _etcd(self, host, statue, lst):
        s = ''
        for i in lst:
            s = s + "%s," % (i)
        cmd = "%s --etcd %s %s" % (self.node_script, statue, s[:-1])
        print (cmd)
        (out, err) = _exec_remote(host, cmd)
        if (out):
            _dmsg(host + ":\n" + out)
        if (err):
            _dwarn(host + ":\n" + err)

    def _init(self, host):
        cmd = "%s --init %s" % (self.node_script, self.config.clustername)
        (out, err) = _exec_remote(host, cmd)
        if (out):
            _dmsg(host + ":\n" + out)
        if (err):
            _dwarn(host + ":\n" + err)
        self.shm = '/dev/shm/lich4'
        #print("%s@%s" %(cmd, host))
        #print(res)

    def _start(self, host):
        cmd = "%s --start --ttyonly" % (self.node_script)
        return self._node_exec(host, cmd)

    def _stop(self, host):
        cmd = "%s --stop --ttyonly" % (self.node_script)
        return self._node_exec(host, cmd)

    def _restart(self, host):
        cmd = "%s --restart --ttyonly" % (self.node_script)
        return self._node_exec(host, cmd)

    def start(self, hosts=None):
        hosts = self._node_list(hosts)

        args = [[x] for x in hosts]
        mutil_exec(self._start, args)

    def stop(self, hosts=None):
        hosts = self._node_list(hosts)

        args = [[x] for x in hosts]
        mutil_exec(self._stop, args)

    def restart(self, hosts=None):
        hosts = self._node_list(hosts)

        args = [[x] for x in hosts]
        mutil_exec(self._restart, args)

    def clean_cluster_dangerously(self, hosts=None):
        hosts = self._node_list(hosts)

        cmd = "%s --clean_node_dangerously --ttyonly" % (self.node_script)
        args = [[x, cmd] for x in hosts]
        mutil_exec(self._node_exec, args)

    def __show(self, hosts):
        lst = []

        def __show_warp(i, lst):
            cmd = "%s --stat --json" % (self.node_script)
            try:
                (out, err) = _exec_remote(i, cmd)
                #(out, err) = _exec_http(i, cmd);
                lst.extend(json.loads(out))
            except IOError as err:
                pass
                #print ("stop %s %s" % (host, str(err)))
            except ValueError as err:
                pass
            except Exp as err:
                pass

        args = [[x, lst] for x in hosts]
        mutil_exec(__show_warp, args)

        return lst

    def __show_unavailable(self):
        _derror("site:N/A")
        _derror("zone:N/A")
        _derror("node:N/A")
        _derror("disk:N/A")
        _derror("used:N/A")
        _derror("capacity:N/A")

    def __show_online(self, d, name):
        if (d[0] == d[1]):
            _dmsg("%s:%d/%d" % (name, d[0], d[1]))
        else:
            _dmsg("%s:%d/%d" % (name, d[0], d[1]))

    def __show_offline1(self, host):
        cmd = "%s --stat scan" % (self.node_script)
        try:
            (out, err) = _exec_remote(host, cmd);
            off_list = out.splitlines()
            for off_obj in off_list:
                if off_obj.endswith(".object"):
                    chkid = off_obj
                    if off_obj not in self.off_line:
                        self.off_line[chkid] = []
                elif off_obj.endswith(".raw"):
                    if off_obj not in self.off_line[chkid]:
                        self.off_line[chkid].append(off_obj)

        except IOError as err:
            _derror("offline node:%s" % host)
        except Exp as err:
            _derror("offline node:%s" % host)

    def __show_offline(self, hosts):
        self.off_line = {}

        args = [[x] for x in hosts]
        mutil_exec(self.__show_offline1, args)

        if len(self.off_line) > 100:
            cnt = 0
            for chkid in self.off_line:
                if cnt == 100:
                    break
                cnt += 1
                _dmsg("offline chunk:%s" % chkid)
                for rawid in self.off_line[chkid]:
                    _dmsg("offline chunk:%s" % rawid)
            _dmsg("too many unavailbe chunk...")
        else:
            for chkid in self.off_line:
                _dmsg("offline chunk:%s" % chkid)
                for rawid in self.off_line[chkid]:
                    _dmsg("offline chunk:%s" % rawid)

    def show_create_time(self):
        try:
            cmd = "%s --createtime" % (self.lich_admin)
            _exec_shell(cmd, retry=2, p=False)
        except Exp, e:
            raise Exp(e.errno, os.strerror(e.errno))

    def show(self, human = 0, verbose = False):
        newlist = self.config.hosts.keys()

        lst = self.__show(newlist)

        if self.storage is None:
            self.storage = Storage(self.config)

        try:
            total = len(self.storage.list_node())
        except Exp, e:
            self.__show_unavailable()
            self.show_create_time()
            exit(e.errno)

        # show node info
        try:
            sysstat = self.config.sysstat()
        except Exp as e:
            self.__show_unavailable()
            self.show_create_time()
            exit(e.errno)

        self.__show_online(sysstat['site'], 'site')
        self.__show_online(sysstat['rack'], 'zone')
        self.__show_online(sysstat['node'], 'node')
        #self.__show_online(sysstat['disk'], 'disk')

        # show pool info
        pool = {}

        for i in lst:
            if (i['running'] == False) or (i.get('deleting')):
                continue

            if 'pool' in i:
                for p in i['pool']:
                    if p not in pool:
                        pool[p] = {}
                        pool[p]['total'] = 0
                        pool[p]['used'] = 0
                    pool[p]['total'] += int(i['pool'][p]['total'])
                    pool[p]['used'] += int(i['pool'][p]['used'])

        #print ('human %d' % (human));
        pool_manage = PoolManage(Config())
        pool_list = pool_manage.pool_list()
        _dmsg("pool:%s" % len(pool_list))

        if (human):
            for p in pool:
                _dmsg("%s:%s/%s" % (p, _human_readable(pool[p]['used']), _human_readable(pool[p]['total']) ))
        else:
            for p in pool:
                _dmsg("%s:%s/%d" % (p, pool[p]['used'], pool[p]['total']))

        if not verbose:
            self.show_create_time()
            return

        # show verbose info
        node_pool = {}
        for i in lst:
            hostname = str(i['hostname'])
            if i['running']:
                used = 0
                capacity = 0
                if 'pool' not in i:
                    continue
                for p in i['pool']:
                    pool_used = int(i['pool'][p]['used'])
                    pool_capacity = int(i['pool'][p]['total'])
                    used += pool_used
                    capacity += pool_capacity
                    if (human):
                        node_pool[str(p)] = {}
                        node_pool[str(p)]['used'] = _human_readable(pool_used)
                        node_pool[str(p)]['total'] = _human_readable(pool_capacity)
                    else:
                        node_pool[str(p)] = {}
                        node_pool[str(p)]['used'] = pool_used
                        node_pool[str(p)]['total'] = pool_capacity

                if (human):
                    _dmsg("%s:running(%s/%s = %s)" % (hostname, _human_readable(used), _human_readable(capacity), node_pool))
                else:
                    _dmsg("%s:running(%s/%s = %s)" % (hostname, used, capacity, node_pool))
            else:
                if i.get('deleting'):
                    _dmsg("%s:deleting" % (hostname))
                else:
                    _dmsg("%s:stopped" % (hostname))

        self.show_create_time()

    def ucarp(self):
        def __ucarp_show(k):
            cmd = "%s --ucarp" % (self.node_script)
            try:
                (out, err) = _exec_remote(k, cmd)
                _dmsg("%s:%s" % (k, out.strip()))
            except IOError as err:
                _derror("ucarp show @ %s, ret %s" % (k, str(err)))
            except SSHException as err:
                _derror("ucarp show @ %s, ret %s" % (k, str(err)))
            except Exp, e:
                _derror("ucarp show %s:%s" % (k, e.err))

        args = [[x] for x in self.config.hosts.keys()]
        mutil_exec(__ucarp_show, args)

    def node_check_site(self, nodes):
        #sites = {"site1": [], "site2": []}
        #print 'nodes: ', nodes
        sites = {}
        for x in nodes:
            site = name2site(x)
            if site not in sites.keys():
                sites.update({site: [x]})
            else:
                sites[site].append(x)

        for site in sites:
            x = sites[site]
            if len(x) > self.config.storage_area_max_node:
                msg = "too more nodes in one storage_area, %s, the limit was: %s" \
                    % (sites, self.config.storage_area_max_node)
                raise Exp(errno.EINVAL, msg)

    def node_clean(self):
        """ first scan, then clean

        :param health:
        :return:
        """
        health = self.health(['scan'])
        analysis = self.analysis_health(health)
        if analysis['node_offline'] != 0 and\
                analysis['node_online'] >= 2 and\
                analysis['recovery_need'] == 0 and\
                analysis['recovery_lost'] == 0 and\
                analysis['recovery_fail'] == 0 and\
                analysis['scan_fail'] == 0:
            print "clean nodes", analysis['node_offline_list']
            try:
                self.drop_node(analysis['node_offline_list'])
            except Exp, e:
                _derror(e.err)
                exit(e.errno)
        elif analysis['node_online'] < 2:
            _derror('online node too few')
        elif analysis['node_offline'] != 0:
            _derror('can not drop node')
            if analysis['recovery_need'] != 0:
                _derror('recovery_need:%s' % analysis['recovery_need'])
            if analysis['recovery_lost'] != 0:
                _derror('recovery_lost:%s' % analysis['recovery_lost'])
            if analysis['recovery_fail'] != 0:
                _derror('recovery_fail:%s' % analysis['recovery_fail'])
            if analysis['scan_fail'] != 0:
                _derror('scan_fail:%s' % analysis['scan_fail'])
        else:
            _dwarn('no node need drop')

    def __get_pool_status(self, analysis, pool):
        '''
            1. Available
            2. Unavailable
            3. Readonly
            4. Pending
        '''
        '''
        etcdctl = etcd.Client(port=2379)
        try:
            status = etcdctl.get(os.path.join("/lich4/status", pool)).value
        except etcd.EtcdKeyNotFound:
            status = "Unavailable"
        '''
        pm = PoolManage(Config())
        status = pm.status(pool)

        if status.startswith("Unavailable"):
            return "Unavailable"

        disk_total = analysis["disk_total"][pool]
        disk_used = analysis["disk_used"][pool]
        if (int(disk_total) == 0):
            status =  "Readonly"
            #return "Unavailable"
        elif (disk_used/float(disk_total) > 0.98):
            status = "Readonly"

        if analysis['recovery_need'][pool] > 0:
            status = "Degraded"

        return status

    def get_pools(self):
        #etcdctl ls /lich4/storage
        cmd = ["etcdctl", "ls", "/lich4/storage"]
        s, e = _exec_pipe1(cmd, retry=1, p=False)
        pools = []
        for line in s.split():
            pool = line.split("/")[-1]
            pools.append(pool)

        return pools

    def __init_analysis(self, analysis):
        pools = self.get_pools()
        for pool in pools:
            analysis['disk_offline'][pool] = 0
            analysis['disk_offline_list'][pool] = 0
            analysis['disk_cache_list'][pool] = 0
            analysis['disk_total'][pool] = 0
            analysis['disk_used'][pool] = 0
            analysis['recovery_total'][pool] = 0
            analysis['recovery_success'][pool] = 0
            analysis['recovery_lost'][pool] = 0
            analysis['recovery_offline'][pool] = 0
            analysis['recovery_fail'][pool] = 0
            analysis['recovery_need'][pool] = 0
            analysis['recovery_running'][pool] = 0
            analysis['recovery_scanning'][pool] = 0
            analysis['recovery_waiting'][pool] = 0
            analysis['recovery_status'][pool] = 0
            analysis['scan_fail'][pool] = 0
            #analysis['scan_fail_list'][pool] = 0
            analysis['last_scan'][pool] = 0
            analysis['status'][pool] = None
            analysis['speed'][pool] = 0

    def health(self, ext):
        self.offline_json = {
            "nodestat": {
                "running": False
            },
            "cachestat": {
            },
            "chunkstat": {
            },
            "diskstat": {
            }
        }
        try:
            d = self.__instences_by_lich_admin()
            hosts = d.keys()
        except:
            hosts = self.config.hosts.keys()

        dlst = {}

        def __health_warp(host):
            lich_node = LichNode()
            r = lich_node.health(host, ext)
            try:
                dlst[host] = json.loads(r.strip('\n'))
            except:
                dlst[host] = self.offline_json

        timeout_args = []
        mutil_exec(__health_warp, [[x] for x in hosts], timeout=20, timeout_args=timeout_args)

        for i in timeout_args:
            if 'clean' not in ext:
                dlst[i[0]] = self.offline_json
            else:
                _dwarn("%s:time out" % (i[0]))

        return dlst

    def __check_pool_chunkstat_format(self, ps):
        for x in ['recovery', 'success', 'fail', 'lost', 'offline', 'speed', 'scan_fail', 'lastscan', 'status']:
            if x not in ps:
                return False
        return True

    def analysis_health(self, health):
        analysis = {}
        analysis['node_offline'] = 0
        analysis['node_online'] = 0
        analysis['node_offline_list'] = []
        analysis['disk_offline'] = {}
        analysis['disk_offline_list'] = {}
        analysis['disk_cache_list'] = {}
        analysis['disk_total'] = {}
        analysis['disk_used'] = {}
        analysis['recovery_total'] = {}
        analysis['recovery_success'] = {}
        analysis['recovery_lost'] = {}
        analysis['recovery_offline'] = {}
        analysis['recovery_fail'] = {}
        analysis['recovery_need'] = {}
        analysis['recovery_running'] = {}
        analysis['recovery_scanning'] = {}
        analysis['recovery_waiting'] = {}
        analysis['recovery_status'] = {}
        analysis['scan_fail'] = {}
        analysis['scan_fail_list'] = []
        analysis['last_scan'] = {}
        analysis['status'] = {}
        analysis['speed'] = {}

        self.__init_analysis(analysis)

        for node in health:
            cachestat = []
            for disk in health[node]['cachestat']:
                msg = 'need set %s ' % str(disk)
                for cache in health[node]['cachestat'][disk]:
                    msg += '%s to %s,' %(str(cache), str(health[node]['cachestat'][disk][cache]))
                cachestat.append(msg)

            if len(cachestat):
                analysis['disk_cache_list'][node] = cachestat
            if not health[node]['nodestat']['running']:
                analysis['node_offline'] += 1
                analysis['node_offline_list'].append(node)
            else:
                analysis['node_online'] += 1

            for pool in health[node]['diskstat']:
                if pool not in analysis['disk_offline']:
                    analysis['disk_offline'][pool] = 0
                if pool not in analysis['disk_total']:
                    analysis['disk_total'][pool] = 0
                if pool not in analysis['disk_used']:
                    analysis['disk_used'][pool] = 0

                # {
                # u'wbcount': u'0',
                # u'total': u'915712',
                # u'cached': u'0',
                # u'cache': u'0',
                # u'used': u'24109',
                # u'wbused': u'0',
                # u'online': u'0',
                # u'wbtotal': u'0',
                # u'disk': u'1'
                # }
                ds = health[node]['diskstat'][pool]

                for disk in ds:
                    if 'online' in ds[disk]:
                        if ds[disk]['online'] == '0':
                            # print '**', node, pool, disk, ds[disk]
                            analysis['disk_offline'][pool] += 1

                            if node not in analysis['disk_offline_list']:
                                analysis['disk_offline_list'][node] = []
                            analysis['disk_offline_list'][node].append(str(disk))

                    analysis['disk_total'][pool] += int(ds[disk]["total"])
                    analysis['disk_used'][pool] += int(ds[disk]["used"])

            for pool in health[node]['chunkstat']:
                if not health[node]['chunkstat'][pool]:
                    continue

                if pool not in analysis['recovery_total']:
                    analysis['recovery_total'][pool] = 0
                    analysis['recovery_success'][pool] = 0
                    analysis['recovery_lost'][pool] = 0
                    analysis['recovery_offline'][pool] = 0
                    analysis['recovery_fail'][pool] = 0
                    analysis['recovery_need'][pool] = 0
                    analysis['recovery_running'][pool] = 0
                    analysis['recovery_scanning'][pool] = 0
                    analysis['recovery_waiting'][pool] = 0
                    analysis['recovery_status'][pool] = 0
                    analysis['speed'][pool] = 0
                    analysis['scan_fail'][pool] = 0

                ps = health[node]['chunkstat'][pool]

                if not self.__check_pool_chunkstat_format(ps):
                    _dwarn("node %s pool %s: %s" % (node, pool, ps))
                    continue

                analysis['recovery_total'][pool] += int(ps['recovery'])
                analysis['recovery_lost'][pool] += int(ps['lost'])
                analysis['recovery_offline'][pool] += int(ps['offline'])
                analysis['recovery_success'][pool] += int(ps['success'])
                analysis['recovery_fail'][pool] += int(ps['fail'])
                analysis['recovery_need'][pool] += int(ps['recovery']) - int(ps['success']) - int(ps['fail'])
                analysis['speed'][pool] += int(ps['speed'])
                analysis['scan_fail'][pool] += int(ps['scan_fail'])
                if int(ps['lastscan']) > analysis['last_scan'][pool]:
                    analysis['last_scan'][pool] = int(ps['lastscan'])

                if ps['status'] == 'running':
                    analysis['recovery_running'][pool] += 1
                elif ps['status'] == 'scanning':
                    analysis['recovery_scanning'][pool] += 1
                else:
                    analysis['recovery_waiting'][pool] += 1

                if node not in analysis['recovery_status']:
                    analysis['recovery_status'][node] = {}
                if pool not in analysis['recovery_status'][node]:
                    analysis['recovery_status'][node][pool] = []
                analysis['recovery_status'][node][pool].append(str(ps['status']))
                # analysis['recovery_status'][node][pool] = str(health[node]['chunkstat'][pool]['status'])
        return analysis

    def show_health(self, analysis, verbose, ext):
        if verbose:
            print "node offline :", analysis['node_offline_list']
        else:
            print "node offline :", analysis['node_offline']

        if verbose:
            print "disk cache :", analysis['disk_cache_list']

        if verbose:
            print "   disk offline :", analysis['disk_offline_list']

        for pool in analysis['recovery_total']:
            if pool not in analysis['disk_total']:
                continue

            print pool, "info :"

            if pool in analysis['disk_offline']:
                print "   disk offline :", analysis['disk_offline'][pool]

            if not analysis['recovery_total']:
                continue

            print "   disk total:", analysis['disk_total'][pool]
            print "   disk used:", analysis['disk_used'][pool]
            print "   chunk recovery total :", analysis['recovery_total'][pool]
            print "   chunk recovery offline:", analysis['recovery_offline'][pool]
            print "   chunk recovery lost :", analysis['recovery_lost'][pool]
            print "   chunk recovery success :", analysis['recovery_success'][pool]
            print "   chunk recovery fail :", analysis['recovery_fail'][pool]
            print "   chunk need recovery :", analysis['recovery_need'][pool]
            if verbose:
                print "   chunk recovery status :", analysis['recovery_status']
            else:
                print "   chunk recovery scanning :", analysis['recovery_scanning'][pool]
                print "   chunk recovery running :", analysis['recovery_running'][pool]
                print "   chunk recovery waiting :", analysis['recovery_waiting'][pool]
                print "   chunk recovery speed :", analysis['speed'][pool]

            print "   status  : %s" % (self.__get_pool_status(analysis, pool))

            time_now = int(time.time())
            if analysis['last_scan'][pool] == 0:
                print "   last scan :", "long time ago"
            elif time_now - analysis['last_scan'][pool] < 0:
                print "   last scan :", "just now"
            else:
                print "   last scan :", _human_readable_time(time_now - analysis['last_scan'][pool]) + "ago"

    def __get_capacity(self, nodeinfo):
        #  {"lich_test1":{'total':0, 'used':0}}
        hostname = nodeinfo['hostname']
        d_pool = {}
        d = dict({'total':0, 'used':0})
        d_pool[hostname] = d

        if 'pool' in nodeinfo:
            for p in nodeinfo['pool']:
                d_pool[hostname]['total'] += int(nodeinfo['pool'][p]['total'])
                d_pool[hostname]['used'] += int(nodeinfo['pool'][p]['used'])

        return d_pool

    def __listnode(self, k):
        cmd = "%s --stat --json" % (self.node_script)
        try:
            #(out, err) = _exec_http(k, cmd);
            (out, err) = _exec_remote(k, cmd);
            return json.loads(out)
        except IOError as e:
            _dwarn("%s:%s" % (k, e))
        except SSHException as e:
            _dwarn("%s:%s" % (k, e))
        except ValueError as e:
            _dwarn("%s:unknow(%s)" % (k, e))
        except Exp, e:
            _dwarn("%s:%s" % (k, e.err))

        return []

    def list_xx(self, xx):
        if xx == 'storagearea':
            bd = Lichbd()
            print bd.list_storagearea()
            return

    def listnode(self, hosts, verbose):
        hosts = self.get_hosts(hosts)

        def __listnode_warp(host, lst):
            r = self.__listnode(host)
            lst.extend(r)

        lst = []
        args = [[x, lst] for x in hosts]
        timeout_args = []
        mutil_exec(__listnode_warp, args, timeout = 5, timeout_args = timeout_args)

        if (verbose):
            for i in lst:
                host = i['hostname']
                d_pool = self.__get_capacity(i)
                load = float(i['load']) * 1000
                if (i['running'] == False and i['starting'] == False):
                    _dmsg("%s:{status:stopped, gateway:%s, latency:%fms, used:%s, total:%s, uptime:%s, nid:%s}" %
                                    (host, i['gateway'], load, _human_readable(d_pool[host]['used']),
                                            _human_readable(d_pool[host]['total']), time2str(int(i['uptime'])), i['nid']))
                elif (i['starting'] == True):
                    if (load < 0.1):
                        load = 0
                    _dmsg("%s:{status:%s,starting, gateway:%s, latency:%fms, used:%s, total:%s, uptime:%s, nid:%s}" %
                                    (host, i['gateway'], i['status'], load, _human_readable(d_pool[host]['used']),
                                            _human_readable(d_pool[host]['total']), time2str(int(i['uptime'])), i['nid']))
                elif ('deleting' in i):
                    if (load < 0.1):
                        load = 0
                    _dmsg("%s:{status:%s,deleting, gateway:%s, latency:%fms, used:%s, total:%s, uptime:%s, nid:%s}" %
                                    (host, i['status'], i['gateway'], load, _human_readable(d_pool[host]['used']),
                                            _human_readable(d_pool[host]['total']), time2str(int(i['uptime'])), i['nid']))
                else:
                    if (load < 0.1):
                        load = 0
                    _dwarn("%s:{status:%s, gateway:%s, latency:%fms, used:%s, total:%s, uptime:%s, nid:%s}" %
                                    (host, i['status'], i['gateway'], load, _human_readable(d_pool[host]['used']),
                                            _human_readable(d_pool[host]['total']), time2str(int(i['uptime'])), i['nid']))
        else:
            for i in lst:
                if (i['running'] == False and i['starting'] == False):
                    _dwarn("%s:stopped" % (i['hostname']))
                elif (i['starting'] == True):
                    _dwarn("%s:%s,starting" % (i['hostname'], i['status']))
                elif ('deleting' in i):
                    _dwarn("%s:%s,deleting" % (i['hostname'], i['status']))
                else:
                    _dmsg("%s:%s" % (i['hostname'], i['status']))

        for arg in timeout_args:
            host = arg[0]
            _derror("%s: %s" % (host, 'timeout'))

    def _update(self, dist, src, backup, op, hosts = None, force=False, password=None):
        """
        lichd_num = 0
        for host in self.lichd:
	    lichd_num = lichd_num + int(host['num'])
        """
        dist = os.path.abspath(dist)
        src = os.path.abspath(src)
        backup = os.path.abspath(backup)

        bak_dir = backup
        bak_day = '%s/%s' % (bak_dir, time.strftime('%Y-%m-%d'))
        bak_time = time.strftime('%H:%M:%S')
        bak_dir = '%s/%s-%s.tar.gz' % (bak_day, op, bak_time)
        tmp_update = '/tmp/%s-tmp-%s.tar.gz' % (op, bak_time)

        if (hosts == None):
            hosts = self.config.hosts

        cmd1 = "%s --update %s %s %s" % (self.node_script, dist, tmp_update, bak_dir)
        cmd2 = "mkdir -p %s && tar xf %s -C %s > /dev/null && rm %s " % (dist, tmp_update, dist, tmp_update)
        cmd = 'if [ -f %s ]; then %s; else %s; fi  ' % (self.node_script, cmd1, cmd2)

        def _put_remote_warp(k):
            _dmsg('update %s to %s' % (op, k))
            # backup and update app directory
            try:
                _put_remote(k, src, tmp_update, password=password)
                if force:
                    (out, err) = _exec_remote(k, cmd2, password=password)
                else:
                    (out, err) = _exec_remote(k, cmd, password=password)

                if (out):
                    _dmsg(k + ":\n" + out)
                if (err):
                    _dwarn(k + ":\n" + err)

            except Exp, e:
                _derror("%s : %s" % (k, e))

        args = [[k] for (k, v) in hosts.items()]
        mutil_exec(_put_remote_warp, args)

    def sshkey(self, newlist, password = None):
        if (password == None):
            password = raw_input("input password:")

        for i in newlist:
            try:
                print("deploy sshkey for " + i)
                #_exec_remote(i, "setenforce 0 && /etc/init.d/iptables stop", "root", passwd)
                _sshkey(i, password)
                _exec_pipe([self.config.lich + "/admin/gen_hostkey.sh", i])
            except SSHException as err:
                _derror("%s:%s" % (i, str(err)))
                continue
            except Exp, e:
                _derror("%s:%s" % (i, e.err))
                continue

        os.system("cat /root/.ssh/known_hosts > /tmp/known_hosts")

        for i in self.config.hosts.keys():
            try:
                _put_remote(i, "/tmp/known_hosts", "/root/.ssh/known_hosts")
            except SSHException as err:
                _derror("%s:%s" % (i, str(err)))
                continue
            except Exp, e:
                _derror("%s:%s" % (i, e.err))
                continue

        for i in newlist:
            try:
                _put_remote(i, "/tmp/known_hosts", "/root/.ssh/known_hosts", 'root', password)
            except SSHException as err:
                _derror("%s:%s" % (i, str(err)))
                continue
            except Exp, e:
                _derror("%s:%s" % (i, e.err))
                continue

        os.system("rm -f  /tmp/known_hosts")

    def __sync_hosts(self, hostsfile, hosts):
        _dmsg("sync hosts to " + str(hosts))
        for host in hosts:
            try:
                _exec_remote(host, "mkdir -p %s/etc" % (self.config.home))
                _put_remote(host, hostsfile, self.config.hosts_conf)
            except SSHException as err:
                _derror("%s:%s" % (host, str(err)))
                continue
            except Exp, e:
                _derror("%s:%s" % (host, e.err))
                continue

    def __set_host(self, hosts):
        for host in hosts:
            cmd = "sed -i 's/HOSTNAME=.*/HOSTNAME=%s/g' /etc/sysconfig/network && hostname %s && export PS1='[\u@\H \w]# '" % (host, host)
            try:
                _exec_remote(host, cmd)
            except SSHException as err:
                _derror("%s:%s" % (host, str(err)))
                continue
            except Exp, e:
                _derror("%s:%s" % (host, e.err))
                continue

    def __prep(self, host, password):
        print("prep env for " + host)

        if not self.config.nohosts:
            (distro, release, codename) = lsb.lsb_release()
            if distro == 'Ubuntu':
                cmd = "echo %s>/etc/hostname && hostname %s && export PS1='[\u@\H \w]# '" % (host, host)
            else:
                cmd = "sed -i 's/HOSTNAME=.*/HOSTNAME=%s/g' /etc/sysconfig/network && hostname %s && export PS1='[\u@\H \w]# '" % (host, host)
            _exec_remote(host, cmd, "root", password)

        cmd = "mkdir -p %s" % (os.path.dirname(self.config.hosts_conf))
        _exec_remote(host, cmd, "root", password)
        _put_remote(host, "/tmp/hosts", self.config.hosts_conf, "root", password)
        #_put_remote(host, "/tmp/known_hosts", "/root/.ssh/known_hosts", "root", password)

    def prep(self, hosts, password=None):
        newlist = _str2hosts(hosts)
        newlist = _star2hosts(newlist)
        _check_hosts(newlist, self.config.nohosts)
        for i in newlist:
            if not self.config.nohosts and _isip(i):
                raise Exp(errno.EINVAL, "IP address only used for nohosts type")
            if self.config.nohosts and not _isip(i):
                raise Exp(errno.EINVAL, "nohosts type only allow IP address")

        if not password:
            password = raw_input("input password:")

        self.sshkey(newlist, password)

        os.system("cat "+ self.config.hosts_conf +" > /tmp/hosts")

        for i in newlist:
            try:
                self.__prep(i, password);
            except SSHException as err:
                _derror("%s:%s" % (i, str(err)))
                continue
            except Exp, e:
                _derror("%s:%s" % (i, e.err))
                continue

        os.system("rm -f /tmp/hosts")

    def update(self, op, src=None, hosts=None, force=False, password=None):
        if src is None:
            src = self._get_src(op)
        else:
            if not os.path.exists(src):
                if src.startswith('-'):
                    raise Exp(errno.EINVAL, "%s not support, use --help for help" % src)
                else:
                    raise Exp(errno.ENOENT, "%s file not found" % src)

        if src is None:
            return
        if (op == "etc"):
            dist = os.path.abspath(self.config.home + "/etc")
            backup = os.path.abspath(self.config.home + "/tmp/backup/etc")
        elif (op == "lich"):
            dist = os.path.abspath(self.config.lich)
            backup = os.path.abspath(self.config.home + "/tmp/backup/lich")
        elif (op == "qemu"):
            dist = os.path.abspath(self.config.home + "/qemu")
            backup = os.path.abspath(self.config.home + "/tmp/backup/qemu")
        else:
            raise Exp(errno.EINVAL, "not support %s" %(op))

        self._update(dist, src, backup, op, hosts, force, password)

    def _get_src(self, op):
        src_tar = None
        if (op == "etc"):
            src_file = os.path.abspath(self.config.home + "/etc")
            src_tar = "/tmp/lich_etc.tar.gz"
        elif (op == "lich"):
            src_file = os.path.abspath(self.config.lich)
            src_tar = "/tmp/lich.tar.gz"
        elif (op == "qemu"):
            src_file = os.path.abspath(self.config.home + "/qemu")
            if not os.path.exists(src_file) or not os.listdir(src_file):
                return src_tar
            src_tar = "/tmp/qemu.tar.gz"
        else:
            raise Exp(errno.EINVAL, "not support %s" %(op))
        cmd = "rm -rf %s && cd %s && tar czvf %s * %s"%(src_tar, src_file, src_tar, etc_exclude)
        out = commands.getstatusoutput(cmd)
        if out[0] == 0:
            return src_tar
        raise Exp(out[0], out[1].strip())

    def version(self):
        print _exec_pipe([self.config.lichd, '-v'], p=False),

    def log_clean(self):
        def __log_clean(k):
            cmd = "%s --log clean" % (self.node_script)
            try:
                (out, err) = _exec_remote(k, cmd)
                _dmsg("clean log @ %s" % (k))
            except IOError as err:
                _derror("clean log @ %s, ret %s" % (k, str(err)))
            except SSHException as err:
                _derror("clean log @ %s, ret %s" % (k, str(err)))
            except Exp, e:
                _derror("clean %s:%s" % (k, e.err))

        args = [[x] for x in self.config.hosts.keys()]
        mutil_exec(__log_clean, args)

    def log_collect(self, begin, end):
        begin = '"' + begin + '"'
        end= '"' + end + '"'
        for (k, v) in self.config.hosts.items():
            cmd = "%s --log collect %s %s" % (self.node_script, begin, end)
            try:
                _dmsg("collect log @ %s" % (k))
                (out, err) = _exec_remote(k, cmd)
            except IOError as err:
                _derror("collect log @ %s, ret %s" % (k, str(err)))
            except SSHException as err:
                _derror("collect log @ %s, ret %s" % (k, str(err)))
            except Exp, e:
                _derror("collect %s:%s" % (k, e.err))
                continue

        os.system('mkdir -p /root/collect-log')
        for (k, v) in self.config.hosts.items():
            try:
                os.system('scp root@%s:/root/collect-%s-log* /root/collect-log'%(k, k))
            except IOError as err:
                _derror("collect log @ %s, ret %s" % (k, str(err)))
            except SSHException as err:
                _derror("collect log @ %s, ret %s" % (k, str(err)))
            except Exp, e:
                _derror("collect %s:%s" % (k, e.err))
                continue

            try:
                os.system('ssh root@%s rm -rf /root/collect-%s-log*' % (k, k))
            except Exp, e:
                pass

        collect_time = str(int(time.time()))
        try:
            os.system("cd /root && tar zcf collect-log-%s.tar.gz collect-log" % collect_time)
        except Exception, e:
            os.system("rm -rf /root/collect-log-%s.tar.gz" % collect_time)
            raise

        os.system("rm -rf /root/collect-log")

    def log_tail(self):
        def __log_tail(k):
            cmd = "%s --log tail" % (self.node_script)
            try:
                (out, err) = _exec_remote(k, cmd)
                _dmsg("tail log @ %s" % (k))
                print out
            except IOError as err:
                _derror("tail log @ %s, ret %s" % (k, str(err)))
            except SSHException as err:
                _derror("tail log @ %s, ret %s" % (k, str(err)))
            except Exp, e:
                _derror("tail %s:%s" % (k, e.err))

        args = [[x] for x in self.config.hosts.keys()]
        mutil_exec(__log_tail, args)

    def log_backup(self):
        bak_time = time.strftime('%Y%m%d%H%M%S')

        def __log_backup(k, bak_time):
            cmd = "%s --log backup %s" % (self.node_script, bak_time)
            try:
                (out, err) = _exec_remote(k, cmd)
                _dmsg("backup log @ %s" % (k))
            except IOError as err:
                _derror("backup log @ %s, ret %s" % (k, str(err)))
            except SSHException as err:
                _derror("backup log @ %s, ret %s" % (k, str(err)))
            except Exp, e:
                _derror("backup log %s:%s" % (k, e.err))

        args = [[x, bak_time] for x in self.config.hosts.keys()]
        mutil_exec(__log_backup, args)

    def _update_qemu(self, hosts = None, force = False):
        qemu_path = os.path.abspath(self.config.home + "/qemu")
        if not os.path.exists(qemu_path) or not os.listdir(qemu_path):
            return
        tmp = os.path.abspath(self.config.home + "/tmp")
        os.system("mkdir -p " + tmp)
        os.system("rm -rf %s/*" % (tmp))
        tmp += "/qemu.tar.gz"
        cmd = "cd %s && tar czf %s * %s" % (os.path.abspath(self.config.home + "/qemu"), tmp, etc_exclude)
        os.system(cmd)
        self.update("qemu", tmp, hosts, force)

    def _update_etc(self, hosts = None, force = False):
        tmp = os.path.abspath(self.config.home + "/tmp")
        os.system("mkdir -p " + tmp)
        os.system("rm -rf %s/*" % (tmp))
        tmp += "/etc.tar.gz"
        cmd = "cd %s && tar czf %s * %s" % (os.path.abspath(self.config.home + "/etc"), tmp, etc_exclude)
        os.system(cmd)
        self.update("etc", tmp, hosts, force)

    def _update_lich(self, hosts = None, force = False):
        tmp = os.path.abspath(self.config.tmp)
        os.system("mkdir -p " + tmp)
        os.system("rm -rf %s/*" % (tmp))
        tmp += "/lich.tar.gz"
        cmd = "cd %s && tar czf %s *" % (os.path.abspath(self.config.lich), tmp)
        os.system(cmd)
        self.update("lich", tmp, hosts, force)

    def _set_version(self):
        def __set_version(k):
            path = os.path.join(self.shm, "sysctl/setversion")
            cmd = "mkdir -p %s;echo %s > %s" % (os.path.dirname(path), LICH_VERSION_SNAPTREE, path)
            try:
                (out, err) = _exec_remote(k, cmd)
            except IOError as err:
                _derror("set version @ %s, ret %s" % (k, str(err)))
            except SSHException as err:
                _derror("set version @ %s, ret %s" % (k, str(err)))
            except Exp, e:
                _derror("set version %s:%s" % (k, e.err))

        args = [[x] for x in self.config.hosts.keys()]
        mutil_exec(__set_version, args)

    def create(self, hosts):
        newlist = _str2hosts(hosts)
        newlist = _star2hosts(newlist)
        _check_hosts(newlist, self.config.nohosts)
        self.node_check_site(newlist)

        if self.config.hostname == 'N/A':
            raise Exp(errno.ENOENT, "network config error, please check hosts.conf or ifconfig")
        if (self.config.hostname not in newlist):
            raise Exp(errno.EINVAL, "need include %s" %(self.config.hostname))
        for host in newlist:
            self.__addnode_check_env(host)

        self.config.generateuuid()
        self.config.createcluster(newlist)
        #self.config.addnode(newlist)
        #self._update_etc()

        self.__addnode(newlist, "new", [])
        stat = self.__show(newlist)

        error = 0
        instences = []
        for i in stat:
            instences.append(i['hostname'])

            if (i['running'] == False):
                _derror("disk %s fail" % (i['hostname']))
                error= error + 1

        if (error > 0):
            #raise Exp(error, "create fail, error %u" % (error))
            _derror(error, "create fail, error %u" % (error))

        i = len(instences)
        #self._createcluster1(instences)
        #self._checkmeta(i)
        #self._set_version()

    def deploy(self, host):
        password = getpass.getpass("input the root's password of %s: " % (host))
        hosts = {}
        hosts.update({host: 1})
        #cluster.update(op, src, force=True if force==1 else False)

        self.update('lich', src=None, hosts=hosts, password=password, force=True)
        self.update('etc', src=None, hosts=hosts, password=password, force=True)

    def init(self):
        _derror("init cmd will be removed, do not use it")
        self.create([self.config.hostname])

    def __check_offline(self):
        now = time.time()
        self.health(["scan"])
        while True:
            now2 = time.time()
            if (now2-now > 60):
                raise Exp(errno.EPERM, "need recovery first")

            analysis = self.analysis_health(self.health([]))
            need_retry = 0
            for pool in analysis['recovery_fail']:
                if analysis["scan_fail"][pool]:
                    need_retry = need_retry + 1
                    print 'scan_fail: %d' % (analysis["scan_fail"][pool])
                if analysis["recovery_offline"][pool]:
                    need_retry = need_retry + 1
                    print 'offline: %d' % (analysis["recovery_offline"][pool])
                '''
                if (int(now) > analysis["last_scan"]):
                    need_retry = need_retry + 1
                    print 'last_scan: %d' % (analysis["last_scan"])
                '''
                if (analysis['recovery_fail'][pool]):
                    need_retry = need_retry + 1
                    print 'recovery fail: %d' % (analysis["recovery_fail"][pool])
                if (analysis['recovery_lost'][pool]):
                    need_retry = need_retry + 1
                    print 'recovery lost: %d' % (analysis["recovery_lost"][pool])

            if (need_retry):
                time.sleep(3)
                continue
            else:
                break

    def addnode(self, list1, force=False):
        """ addnode one by one """
        # check etcd status before add node
        try:
            self.__addnode_pre_check_etcd()
        except Exp, e:
            _derror(e.err)
            exit(e.errno)

        for i in list1:
            one = []
            one.append(i)

            if (self.storage == None):
                self.storage = Storage(self.config)

            oldlist = self.storage.list_node()
            newlist = _str2hosts(one)
            newlist = _star2hosts(newlist)
            _check_hosts(newlist, self.config.nohosts)
            self.node_check_site(newlist + oldlist)

            for host in newlist:
                self.__addnode_check_env(host)

            #if not force:
            #    self.__check_offline()
            self.config.addnode(newlist)

            for i in newlist:
                os.system("etcdctl member add %s http://%s:2380" % (i, i))

            self.__addnode(newlist, "existing", oldlist)

            stat = self.__show(newlist)

            error = 0
            instences = []
            for i in stat:

                instences.append(i['hostname'])

                if (i['running'] == False):
                    _derror("disk %s fail" % (i['hostname']))
                    error= error + 1

            if (error > 0):
                raise Exp(error, "add node fail, error %u" % (error))

            self._addnode1(instences)
            self._checkmeta(len(instences) + len(oldlist))

    def __addnode_pre_check_etcd(self):
         """
         check etcd service
         :return:
         """
         if not Etcd_manage.etcd_is_health():
             raise Exp(errno.EPERM, "Etcd status is unhealth, please check etcd status")

    def __addnode_check_env(self, host):
        if (self.config.testing):
            return

        release = _exec_pipe(['python', '-c', "import platform;print platform.dist()[0]"], p=False).split()[-1]
        cmd = '''
        home="%s"
        if [ -e "$home/data" ]; then
            has_child=0

            for disk in `ls $home/data`; do
                if [ $disk == '.' -o $disk == '..' ]; then
                    continue
                fi
                if [ $disk == 'disk' -o $disk == 'fake' -o $disk == 'wlog' ]; then
                    continue
                fi

                if [ $disk == 'etcd' ]; then
                    for etcd in `ls $home/data/etcd`; do
                        if [ $etcd == 'member' ]; then
                            for member in `ls $home/data/etcd/member`; do
                                if [ $member == 'snap' -o $member == 'wal' ]; then
                                    continue
                                else
                                    has_child=1
                                fi
                            done

                            continue
                        else
                            has_child=1
                        fi
                    done

                    continue
                else
                    has_child=1
                fi

                has_child=1
            done
            if [ $has_child != 0 ]; then
                echo "$home/data have data!" 1>&2
                exit 1
            fi
        fi

        if [ -e "/dev/shm/lich4" ]; then
            rm -rf /dev/shm/lich4/
        fi

        if netstat -an | awk '{print $4}' | grep :%s 1>/dev/null; then
            echo "iscsi port %s already be used" 1>&2
            exit 1
        fi

        release=`python -c "import platform;print platform.dist()[0]"`
        if [ ${release} != '%s' ]; then
            echo "can not add ${release} node, this system is %s" 1>&2
            exit 1
        fi

        if ps -ef | awk '{print $8}' | grep '/sbin/lichd' 1>/dev/null; then
            echo "lichd process already running" 1>&2
            exit 1
        fi

        compare=%s
        datetime=`date +%%s`
        if [ $datetime -gt $((compare+60)) -o $datetime -lt $((compare-60)) ]; then
            echo "time is not synchronized" 1>&2
            exit 1
        fi

        ''' \
        %(self.config.home, \
          self.config.iscsi_port, \
          self.config.iscsi_port, \
          release, \
          release, \
          int(time.time()))

        try:
            (out, err) = _exec_remote(host, cmd)
            if err.strip() != '':
                _derror("%s:%s" % (host, err.strip()))
                exit(errno.EPERM)
        except IOError as err:
            _derror("%s:%s" % (host, str(err)))
            exit(errno.EIO)
        except Exp, e:
            _derror("%s:%s" % (host, e.err))
            exit(e.errno)
        except Exception, e:
            _derror("%s:%s" % (host, str(e)))
            exit(e.errno)

    def __addnode(self, newlist, statue, oldlist):
        #self._update_etc()
        self.config.hosts_load(self.config.clusterconf)
        hosts_all = self.config.hosts
        os.system("cat "+ self.config.hosts_conf +" > /tmp/hosts")
        self.__sync_hosts("/tmp/hosts", hosts_all)
        os.system("rm -f /tmp/hosts")

        hosts_new = self.config.list2dict(newlist)
        #self.ssh(hosts)
        hosts_old = {}
        for host in hosts_all:
            if host not in hosts_new:
                hosts_old[host] = hosts_all[host]
        self._update_etc(hosts_old, False)
        self._update_etc(hosts_new, True)
        self._update_lich(hosts_new, True)
        self._update_qemu(hosts_new, True)

        def _node_init_warp(k):
            self._etcd(k[0], k[1], k[2])
            self._init(k[0])
            self._start(k[0])

        args = [[[k, statue, newlist + oldlist]] for (k, v) in hosts_new.items()]
        mutil_exec(_node_init_warp, args)

    def balance(self, hosts=None):
        self._ClusterMeta__loadmeta()

        lich_node = LichNode(debug=True)
        lich_node.metabalance(self.admin)

        new_hosts = self.get_hosts(hosts)
        for i in new_hosts:
            lich_node.chunkbalance(i)

    def recover(self, hosts=None):
        self._ClusterMeta__loadmeta()

        new_hosts = self.get_hosts(hosts)
        lich_node = LichNode(debug=True)
        for i in new_hosts:
            lich_node.recover(i)

    def instences_of_host(self, _host):
        d = self.__instences_by_lich_admin()

        instences = []
        for k, v in d.items():
            host = k
            instence = k
            if host == _host:
                instences.append(instence)

        #print instences
        return instences

    def drop_node(self, hosts):
        instences = []
        for i in hosts:
            instences = instences + self.instences_of_host(i)

        #print instences
        for i in instences:
            cmd = "%s --dropnode %s"%(self.lich_admin, i)
            #print cmd
            try:
                _exec_shell(cmd)
                cmd = "etcdctl member list | grep %s | awk -F ':' '{print $1}'" % (i)
                (uuid,err) = _exec_shell1(cmd)
                os.system("etcdctl member remove %s" % uuid)
            except Exp, e:
                if e.errno == errno.ENOENT:
                    _dwarn("%s was dropped, dropnode" % (i))
                else:
                    raise Exp(e.errno, os.strerror(e.errno))

        if (self.storage == None):
            self.storage = Storage(self.config)
        for i in instences:
            try:
                self.storage.dropmeta(i, True)
            except Exp, e:
                if e.errno == errno.ENOENT:
                    _dwarn("%s was dropped, dropmeta" % (i))
                else:
                    raise Exp(e.errno, os.strerror(e.errno))

        self.config.hosts_load()
        self.config.dropnode(hosts)
        cluster = os.path.join(self.config.lich, "admin/cluster.py")
        _exec_shell("%s update etc"%(cluster))

        metanode_balance()

    def is_online(self, host):
        """
        节点不在线不可drop，节点处理running或starting状态不可drop
        :param host:
        :return:
        """
        if not is_ping(host):
            _derror("%s is unreachable! You can drop it with --force" % host)
            exit(errno.EPERM)

        nodes = self.__listnode(host)
        if len(nodes):
            r = nodes[0]
            if r['running'] or r['starting']:
                raise Exp(errno.EPERM, "host %s not stopped" % host)

    def __check_online(self, newlist, force):
        hosts = self.get_hosts()

        for i in newlist:
            if i not in hosts:
                raise Exp(errno.EPERM, "host %s not in cluster" % i)

        hosts_online = []
        for i in newlist:
            if force:
                if is_ping(i):
                    hosts_online.append(i)
            else:
                if self.is_online(i):
                    hosts_online.append(i)

        if hosts_online:
            msg = "can not use --force if the hosts online: %s" % (' '.join(hosts_online))
            raise Exp(errno.EPERM, msg)

    def drop_check(self, hosts, force=False):
        _check_hosts(hosts, self.config.nohosts)

        self.__check_online(hosts, force)

        if not force:
            # drop node not use castoff temporary
            '''
            cmd = "%s --drop" % (self.node_script)
            for i in newlist:
                (out, err) = _exec_remote(i, cmd)
                print out, err
            '''
            self.__check_offline()

    def drop(self, hosts, force=False):
        newlist = _str2hosts(hosts)
        newlist = _star2hosts(newlist)

        self.drop_check(newlist, force)

        if self.config.hostname in newlist:
            errmsg = "dropnode can not be executed on a node that is being deleted"
            raise Exp(errno.EPERM, errmsg)

        self.drop_node(newlist)

    def __instences_by_lich_admin(self):
        lich_admin = LichAdmin()
        return lich_admin.list_nodes()

    def dump_clusterconf(self):
        self.config.hosts_load()
        hosts_old = self.config.hosts.keys()

        hosts_new = []
        d = self.__instences_by_lich_admin()
        for k, v in d.items():
            hosts_new.append(k)
        hosts_new = list(set(hosts_new))

        for i in hosts_new:
            if i not in hosts_old:
                print 'add', i
                self.config.addnode([i])

        for i in hosts_old:
            if i not in hosts_new:
                print 'drop', i
                self.config.dropnode([i])

        cluster = os.path.join(self.config.lich, "admin/cluster.py")
        _exec_shell("%s update etc" % (cluster))

    def skip(self, host, flag):
        #flag=0=false=join, flag=1=true=leave
        if (flag != '0' and flag != '1'):
                raise Exp(errno.EINVAL, 'value:%s not support' % flag)

        _check_hosts([host], self.config.nohosts)
        cmd = "%s --skip %s" % (self.node_script, flag)
        _exec_remote(host, cmd)

    def statnode(self, hosts):
        newlist = _str2hosts(hosts)
        newlist = _star2hosts(newlist)
        _check_hosts(newlist, self.config.nohosts)

        cmd = "%s --stat --json" % (self.node_script)
        stats = []
        for i in newlist:
            (statjson, err) = _exec_remote(i, cmd)
            stats.append(json.loads(statjson))
        return stats[0]

    def is_standalone(self, force=False):
        sysstats = self.config.sysstat(force)
        return sysstats['node'][-1] == 1

    def _load_hosts_nohosts(self):
        if self.hosts_nohosts is not None:
            return self.hosts_nohosts

        hosts_f = os.path.join(self.config.home, "etc/hosts.conf")
        hosts = {}
        with open(hosts_f, 'r') as f:
            for line in f:
                if line.startswith('#'):
                    continue
                lines = line.strip().split()
                if (len(lines) == 2):
                    if (lines[0] not in hosts.keys()):
                        hosts.update({lines[0]: lines[1]})

        #print hosts
        self.hosts_nohosts = hosts
        return hosts

    def __scan(self):
        newlist = self.config.hosts.keys()
        lst = self.__show(newlist)

        online = 0
        metas = []
        metalist = []
        for i in lst:
            if (i[u'running']):
                online = online + 1

                m = i[u'metas'].split(',')
                if (len(m) and m[-1] == ''):
                    m.pop(-1)

                metalist = metalist + m

                if ('meta' in i[u'status'] or 'admin' in i[u'status']):
                    metas.append(i[u'hostname'])

        metalist = list(set(metalist))
        if (online == 0):
            _derror("online (%d/%d), we lost everything" % (online, len(lst)))
        elif (len(metas) < len(metalist) / 2 + 1):
            _dwarn("not enough metas online (%d/%d)" % (len(metas), len(metalist)))
            for i in metalist:
                if (i in metas):
                    _dwarn("    " + i + " : online")
                else:
                    _derror("    " + i + " : offline")
        else:
            _derror("online (%d/%d)" % (online, len(lst)))

    def scan(self, scan_deep, verbose):
        cmd = "%s --scan /" % (self.config.inspect)
        if scan_deep:
            cmd = cmd + ' --deep'
        if verbose:
            cmd = cmd + ' --verbose'
        _exec_shell(cmd, retry=0, p=False)

    def scan_tree(self):
        cmd = "%s --find /" % (self.config.lichfs)
        _exec_shell(cmd, retry=0, p=False)

    def configdump(self):
        return _exec_pipe([self.config.lich + "/libexec/lich.admin",  "--configdump"], p=False)

    def __os_rm(self, cmd):
        print (cmd)

    def __os_ls(self, cmd):
        print (cmd)

    def __os_echo(self, cmd):
        print (cmd)

    def __os_cmd(self, cmd):
        if (cmd[0] == 'rm'):
            self.__os_rm(cmd)
        elif (cmd[0] == 'ls'):
            self.__os_ls(cmd)
        elif (cmd[0] == 'echo'):
            self.__os_echo(cmd)
        else:
            raise Exp(errno.EINVAL, "not support")

    def os_cmd(self, cmd):
        self.os_cmd = ['rm', 'ls', 'echo']

        if (len(cmd) == 1 or cmd[1] == "--help"):
            print (self.os_cmd)
        elif cmd[1] in self.os_cmd:
            self.__os_cmd(cmd[1:])
        else:
            print ("invalid argument")

    def _remote_add(self, host, ext):
        cmd = "%s --%s_add all" % (self.node_script, ext)
        try:
            (out, err) = _exec_remote(host, cmd)
            if (out):
                _dmsg(host + ":\n" + out)
            if (err):
                _derror(host + ":\n" + err)
        except IOError as err:
            _derror("%s add %s %s" % (host, ext, str(err)))
        except Exp as err:
            _derror("%s add %s %s" % (host, ext, str(err)))

    def remote_add(self, ext, hosts=None):
        if ext not in ['raid', 'disk']:
            raise Exp(errno.EINVAL, 'lich add only support raid|disk operate')

        newlist = []
        if (hosts):
            newlist = _str2hosts(hosts)
            newlist = _star2hosts(newlist)
            _check_hosts(newlist, self.config.nohosts)
        else:
            for (k, v) in self.config.hosts.items():
                newlist.append(k)

        args = [[x, ext] for x in newlist]
        mutil_exec(self._remote_add, args)

    def pool_list(self):
        pool_manage = PoolManage(Config())
        pool = pool_manage.pool_list()
        for p in pool:
            print p

    def pool_create(self, pool):
        pool_manage = PoolManage(Config())
        pool_manage.pool_create(pool)

    def pool_remove_recovery_file(self, pool):
        os.system("rm -rf /opt/fusionstack/data/recovery/%s" % pool)

    def pool_remove(self, pool):
        pool_manage = PoolManage(Config())
        pool_manage.pool_remove(pool)
        self.pool_remove_recovery_file(pool)

    def _configset(self, host, pool, k, v):
        prefix = '/opt/fusionstack/data'
        path = '%s/recovery/%s' % (prefix, pool)

        if k == 'recovery_task_max':
            cmd = 'mkdir -p %s && echo %s > %s/task_max' % (path, v, path)
        elif k == 'recovery_scale':
            cmd = 'mkdir -p %s && echo %s > %s/scale' % (path, v, path)
        else:
            cmd = ''

        if cmd:
            return self._node_exec(host, cmd)

    def configset(self, pool, k, v, hosts=None):
        hosts = self._node_list(hosts)
        args = [[x, pool, k, v] for x in hosts]
        mutil_exec(self._configset, args)

    def _cacheset(self, host, key, value):
        cmd = "%s --cacheset %s %s" % (self.node_script, key, value)
        return self._node_exec(host, cmd)

    def cacheset(self, key, value):
        hosts = self._node_list(None)
        args = [[x, key, value] for x in hosts]
        mutil_exec(self._cacheset, args)

    def _cacheget(self, host, key):
        cmd = "%s --cacheget %s" % (self.node_script, key)
        return self._node_exec(host, cmd)

    def cacheget(self, key):
        hosts = self._node_list(None)
        args = [[x, key] for x in hosts]
        mutil_exec(self._cacheget, args)

def usage(unhide):
    print ("usage:")
    print ("hosts example: dc100[0-3].rack[2-4].host[3-6,7-9] dc101[0-3].rack[2-4].host[3-6,7-9]")
    print (sys.argv[0] + " prep <hosts>")
    print (sys.argv[0] + " create <hosts>")
    print (sys.argv[0] + " update <op> [src] [--force]")
    print (sys.argv[0] + " version")
    print
    print (sys.argv[0] + " addnode <hosts>")
    print (sys.argv[0] + " dropcheck <hosts>")
    print (sys.argv[0] + " dropnode <hosts> [--force]")
    print (sys.argv[0] + " listnode")
    print (sys.argv[0] + " statnode <host> [--json]")
    print
    print (sys.argv[0] + " listpool")
    print (sys.argv[0] + " createpool <pool>")
    print (sys.argv[0] + " removepool <pool>")
    print
    print (sys.argv[0] + " add {raid|disk}")
    print
    print (sys.argv[0] + " start")
    print (sys.argv[0] + " stop")
    print (sys.argv[0] + " restart")
    print
    print (sys.argv[0] + " stat")
    print
    #print (sys.argv[0] + " stat [scan|fullscan]")
    print (sys.argv[0] + " health [scan | clean] [-v] [--json]")
    print (sys.argv[0] + " scan [tree | deep] [-v]")
    print
    print (sys.argv[0] + " log {backup|clean|tail}")
    print (sys.argv[0] + " log collect <stime> <etime>")
    print
    print (sys.argv[0] + " cacheset <key> <value>")
    print (sys.argv[0] + " cacheget <key>")
    print
    if unhide:
        print (sys.argv[0] + " sshkey <hosts>")
        print (sys.argv[0] + " deploy <host>")
        print
        print (sys.argv[0] + " skipnode <host> {1|0}")
        print
        # print (sys.argv[0] + " recover")
        # print (sys.argv[0] + " balance")
        # print
        print (sys.argv[0] + " dump_clusterconf")
        print
        print (sys.argv[0] + " configdump")
        print (sys.argv[0] + " configset <pool> <key> <value>")
        # print (sys.argv[0] + " unhide")


def doit(cluster, options, args):
    op = args[0]
    ext = args[1:]
    force = options.force
    is_json = options.json
    verbose = options.verbose
    passwd = options.password

    if op == 'init':
        cluster.init()
    elif op == 'sshkey':
        newlist = _str2hosts(ext)
        cluster.sshkey(newlist, passwd)
    elif op == 'prep':
        cluster.prep(ext, passwd)
    elif op == 'create':
        cluster.create(ext)
    elif op == 'deploy':
        cluster.deploy(ext[1])
    elif op == 'update':
        try:
            op = ext[0]
        except IndexError:
            op = 'lich'

        try:
            src = ext[1]
        except IndexError:
            src = None

        if op.endswith('.tar.gz') and src is None:
            src = op
            op = 'lich'

        cluster.update(op, src, force=force)
    elif op == 'version':
        cluster.version()
    elif op == 'stat':
        cluster.show(options.human, options.verbose)
    elif op == 'list':
        if ext:
            cluster.list_xx(ext[0])
        else:
            cluster.listnode(ext, verbose=verbose)
    elif op == 'configdump':
        print cluster.configdump()
    elif op == 'dump_clusterconf':
        cluster.dump_clusterconf()
    elif op == 'ucarp':
        cluster.ucarp()

    elif op == 'start':
        cluster.start(ext)
    elif op == 'stop':
        cluster.stop(ext)
    elif op == 'restart':
        cluster.restart(ext)
    elif op == 'clean_cluster_dangerously':
        cluster.clean_cluster_dangerously()

    elif op == 'addnode':
        cluster.addnode(ext, force)
    elif op == 'dropcheck':
        try:
            cluster.drop_check(ext, force)
        except Exp as err:
            _derror("check %s fail: %s" % (ext, str(err)))
            exit(err.errno)
    elif op == 'dropnode':
        try:
            cluster.drop(ext, force)
        except Exp as err:
            _derror("drop %s fail: %s" % (ext, str(err)))
            exit(err.errno)
    elif op == 'skipnode':
        host = ext[0]
        flag = ext[1]
        cluster.skip(host, flag)
    elif op == 'listnode':
        cluster.listnode(ext, verbose)
    elif op == 'statnode':
        hosts = ext
        stats = cluster.statnode(hosts)
        if is_json:
            print json.dumps(stats)
        else:
            print json.dumps(stats, sort_keys=False, indent=4)

    elif op == 'listpool':
        cluster.pool_list()
    elif op == 'createpool':
        if (len(ext) != 1):
            _derror('Invalid pool:%s' % ext)
            exit(errno.EINVAL)
        cluster.pool_create(ext[0])
    elif op == 'removepool':
        if (len(ext) != 1):
            _derror('Invalid pool:%s' % ext)
            exit(errno.EINVAL)
        cluster.pool_remove(ext[0])

    elif op == 'add':
        cluster.remote_add(ext[0])

    elif op == 'health':
        if 'clean' in ext:
            cluster.node_clean()
            health = cluster.health(['clean'])
        else:
            # [] or ['scan']
            health = cluster.health(ext)

        if is_json:
            print json.dumps(health)
        else:
            analysis = cluster.analysis_health(health)
            cluster.show_health(analysis, verbose, ext)

    elif op == 'scan':
        if 'tree' in ext:
            cluster.scan_tree()
        else:
            scan_deep = 'deep' in ext
            cluster.scan(scan_deep, verbose)
    elif op == 'recover':
        cluster.recover()
    elif op == 'balance':
        cluster.balance()

    elif op == 'log':
        subcmd = args[1]
        if subcmd == 'backup':
            cluster.log_backup()
        elif subcmd == 'clean':
            cluster.log_clean()
        elif subcmd == 'tail':
            cluster.log_tail()
        elif subcmd == 'collect':
            if len(ext) != 3:
                _derror('Invalid argument')
                print 'For example: '
                print '     lich log collect "2016-06-07 11:17:56" "2016-06-07 11:17:59"'
                exit(errno.EINVAL)
            else:
                stime = args[2]
                etime = args[3]
                try:
                    begin = time.mktime(time.strptime(stime, '%Y-%m-%d %H:%M:%S'))
                    end = time.mktime(time.strptime(etime, '%Y-%m-%d %H:%M:%S'))
                    now = time.time()
                    if (begin > now or end < begin):
                        _derror('Invalid begin time:(%s)' % stime)
                        exit(errno.EINVAL)
                except Exception, e:
                    _derror("time data does not match format '%Y-%m-%d %H:%M:%S'")
                    print 'such as: '
                    print '     lich log collect "2016-06-07 11:17:56" "2016-06-07 11:17:59"'
                    exit(errno.EPERM)
                cluster.log_collect(stime, etime)
    elif op == 'configset':
        cluster.configset(args[1], args[2], args[3])
    elif op == 'cacheset':
        if (len(ext) != 2):
            _derror('Invalid argument')
            print 'For example: '
            print '     lich cacheset writeback_percent 0   #make flush quickly, default value is 10'
            print '     lich cacheset sequential_cutoff 4k  #set sequential cutoff, default value is 4M'
            exit(errno.EINVAL)
        cluster.cacheset(ext[0], ext[1])
    elif op == 'cacheget':
        if (len(ext) != 1):
            _derror('Invalid argument')
            print 'For example: '
            print '     lich cacheget [writeback_percent|sequential_cutoff|dirty_data]'
            exit(errno.EINVAL)
        cluster.cacheget(ext[0])


#@timeit()
def main():
    arguments = [
        'deploy',
        'sshkey',
        'init',
        'prep',
        'create',
        'update',
        'version',

        'start',
        'stop',
        'restart',
        'clean_cluster_dangerously',

        'stat',
        'list',
        'configdump',
        'dump_clusterconf',
        'ucarp',

        'addnode',
        'dropcheck',
        'dropnode',
        'skipnode',
        'listnode',
        'statnode',

        'listpool',
        'createpool',
        'removepool',

        'add',

        'health',
        'scan',
        'recover',
        'balance',

        'log',
        'configset',

        'cacheset',
        'cacheget',

        'host',
        'human-unreadable',
        'verbose',
        'unhide',
        'help',
    ]

    op = args[0]
    if op not in arguments:
        _derror("Error: Invalid command!")
        exit(errno.EINVAL)

    try:
        cluster = Cluster()
        doit(cluster, options, args)
    except Exp, e:
        _derror(e.err)
        exit(e.errno)


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-m', '--human-unreadable', action='store_false', dest='human', default=True, help='human unreadable')
    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='verbose')
    parser.add_option('-f', '--force', action='store_true', dest='force', default=False, help='force')
    parser.add_option('-j', '--json', action='store_true', dest='json', default=False, help='json')
    parser.add_option('-u', '--unhide', action='store_true', dest='unhide', default=False, help='unhide')
    parser.add_option('-p', '--password', action='store', type='string', dest='password', default='')
    options, args = parser.parse_args()

    # print options, args

    if not args:
        usage(options.unhide)
    else:
        # config = Config()
        main()
